package com.zx.reader.impl;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.poi.hssf.extractor.ExcelExtractor;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;

import com.zx.exception.DataParseException;
import com.zx.reader.FileReader;
import com.zx.util.StringUtils;

/**
 * @Project: dataParse-core
 * @Title: WordFileReader
 * @Description: excel 读取器
 * 		受Excel文件本身的限制，数据只能一次性读取到内存中
 * @author: zhangxue
 * @date: 2018年2月19日下午1:45:47
 * @company: yooli
 * @Copyright: Copyright (c) 2015
 * @version v1.0
 */
public class ExcelFileReader implements FileReader {
	
	/**
	 * 整个Excel的全部数据
	 */
	private String[] lines;
	
	/** 整个Excel的全部sheet的名字,key=名字， value=sheet索引位置 */
	private Map<String, Integer> sheetNames;
	
	/** 整个Excel sheet 页的数量 */
	private int sheetSize;
	
	public ExcelFileReader(String filePath) throws Exception {
		FileInputStream in = null;
		try {
			in = new FileInputStream(filePath);
			//poi文件系统,将xls读取进来
			POIFSFileSystem fs = new POIFSFileSystem(in);
			//将读取进来的xls封装为对象
			HSSFWorkbook xls = new HSSFWorkbook(fs);
			this.sheetSize = xls.getNumberOfSheets();//读取sheet的数据
			this.sheetNames = new HashMap<String, Integer>();
			for (int i = 0; i < this.sheetSize; i++) {
				sheetNames.put(xls.getSheetName(i), i + 1);
			}
			//poi的xls内容提取工具
			ExcelExtractor extractor = new ExcelExtractor(xls);
			//设置不读取sheet名字
			extractor.setIncludeSheetNames(true);
			//设置计算工具获得结果
			extractor.setFormulasNotResults(false);
			//设置包含空白的单元格
			extractor.setIncludeBlankCells(true);
			String text = extractor.getText();
			this.lines = text.split("\n");
		} catch (Exception e) {
			throw new DataParseException(e, "excel读取异常");
		} finally {
			try {
				if(in != null) {
					in.close();
				}
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
	}
	
	public List<String[]> read(String sheetName) throws DataParseException {
		if(this.sheetNames.get(sheetName) == null) {
			throw new DataParseException("读取的" + sheetName + "不存在");
		}
		
		List<String[]> data = new ArrayList<String[]>();
		boolean isStart = false;
		for (String line : lines) {
			if(this.sheetNames.get(line) != null && line.equals(sheetName)) {//找到需要的sheet页
				isStart = true;
				continue;
			}
			if(this.sheetNames.get(line) != null && !line.equals(sheetName)) {//需要的sheet已经结束
				isStart = false;
				continue;
			}
			if(isStart) {
				data.add(line.split("\t"));
			}
		}
		return data;
	}
	/**
	 * @Title: 读取第几个sheet，从1开始数
	 * @param sheetIndex
	 * @return
	 * @throws DataParseException
	 * @Description: TODO
	 * @date: 2018年5月21日下午9:33:49
	 */
	public List<String[]> read(int sheetIndex) throws DataParseException{
		if(sheetIndex > this.sheetSize || sheetIndex <= 0) {
			throw new DataParseException("读取的第" + sheetIndex + "个sheet页不存在");
		}
		
		List<String[]> data = new ArrayList<String[]>();
		int index = 0;
		for (String line : lines) {
			if(this.sheetNames.get(line) != null) {
				index++;
				continue;
			}
			
			if(index == sheetIndex) {
				data.add(line.split("\t"));
			}
		}
		return data;
	}

	@Override
	public void close() {
		
	}
	
}
